<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Prompt GPT-4o audio</title>
    <style>
        * {
            box-sizing: border-box;
        }
        body {
            font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
            max-width: 800px;
            margin: 20px auto;
            padding: 0 20px;
            line-height: 1.6;
        }
        .info {
            background: #e8f4ff;
            padding: 15px;
            border-radius: 4px;
            margin: 20px 0;
            border-left: 4px solid #0066cc;
        }
        .input-group {
            display: flex;
            flex-direction: column;
            gap: 12px;
            margin-bottom: 20px;
        }
        textarea {
            width: 100%;
            padding: 12px;
            font-size: 16px;
            border: 1px solid #ccc;
            border-radius: 4px;
            resize: vertical;
            font-family: inherit;
        }
        #systemPrompt {
            min-height: 60px;
        }
        #promptInput {
            min-height: 150px;
        }
        #responseJson {
            min-height: 200px;
            font-family: monospace;
            background: #f5f5f5;
        }
        select {
            padding: 8px 12px;
            font-size: 16px;
            border: 1px solid #ccc;
            border-radius: 4px;
            max-width: 200px;
        }
        button {
            padding: 8px 16px;
            font-size: 16px;
            background: #0066cc;
            color: white;
            border: none;
            border-radius: 4px;
            cursor: pointer;
            max-width: 200px;
        }
        button:disabled {
            background: #cccccc;
        }
        button:hover:not(:disabled) {
            background: #0055aa;
        }
        .error {
            color: #cc0000;
            margin: 10px 0;
        }
        .player-container {
            margin: 20px 0;
        }
        audio {
            width: 100%;
            margin: 10px 0;
        }
        .transcript {
            background: #f5f5f5;
            padding: 15px;
            border-radius: 4px;
            margin: 10px 0;
        }
        .loading {
            color: #666;
            font-style: italic;
        }
        .json-container {
            margin-top: 20px;
        }
        .copy-button {
            margin-top: 8px;
            background: #4CAF50;
        }
        .copy-button:hover:not(:disabled) {
            background: #45a049;
        }
        .gist-button {
            background: #2ea44f;
            margin-right: 10px;
        }
        .gist-button:hover:not(:disabled) {
            background: #2c974b;
        }
        .gist-links {
            margin: 10px 0;
        }
        .gist-links a {
            display: block;
            margin: 5px 0;
            color: #0066cc;
        }
        #authLink {
            color: #0066cc;
            cursor: pointer;
            text-decoration: underline;
        }
        .select-group {
            display: flex;
            gap: 12px;
        }
    </style>
</head>
<body>
    <h1>Prompt GPT-4o audio</h1>
    <div class="info">
        Enter a prompt below and execute against the selected model to hear the results.
    </div>

    <div class="input-group">
        <label for="systemPrompt">System Prompt (optional):</label>
        <textarea id="systemPrompt" placeholder="Enter system prompt here..." aria-label="System prompt"></textarea>
        
        <label for="promptInput">User Prompt:</label>
        <textarea id="promptInput" placeholder="Enter your text here..." aria-label="Input text"></textarea>
        
        <div class="select-group">
            <select id="modelSelect" aria-label="Model selection">
                <option value="gpt-4o-mini-audio-preview-2024-12-17">GPT-4o mini audio preview (Dec 17, 2024)</option>
                <option value="gpt-4o-audio-preview-2024-12-17">GPT-4o audio preview (Dec 17, 2024)</option>
                <option value="gpt-4o-audio-preview-2024-10-01">GPT-4o audio preview (Oct 1, 2024)</option>
            </select>
            <select id="voiceSelect" aria-label="Voice selection">
                <optgroup label="Recommended voices">
                    <option value="ash">Ash</option>
                    <option value="ballad">Ballad</option>
                    <option value="coral">Coral</option>
                    <option value="sage">Sage</option>
                    <option value="verse">Verse</option>
                </optgroup>
                <optgroup label="Less expressive">
                    <option value="alloy">Alloy</option>
                    <option value="echo">Echo</option>
                    <option value="shimmer">Shimmer</option>
                </optgroup>
            </select>
        </div>
        <button id="submitBtn">Generate Speech</button>
    </div>
    
    <div id="error" class="error" style="display: none;"></div>
    <div id="playerContainer" class="player-container" style="display: none;">
        <audio id="audioPlayer" controls></audio>
        <button id="downloadBtn">Download Audio</button>
        <div id="transcript" class="transcript"></div>
    </div>

    <div id="jsonContainer" class="json-container" style="display: none;">
        <div id="gistContainer">
            <span id="authLinkContainer" style="display: none;">
                <a id="authLink">Authenticate with GitHub</a>
            </span>
            <button id="saveGistBtn" class="gist-button" style="display: none;">Save as Gist</button>
            <div id="gistLinks" class="gist-links"></div>
        </div>
        <h3>API Response:</h3>
        <textarea id="responseJson" readonly></textarea>
        <button id="copyJsonBtn" class="copy-button">Copy to clipboard</button>
    </div>

    <script>
        const promptInput = document.getElementById('promptInput');
        const systemPrompt = document.getElementById('systemPrompt');
        const modelSelect = document.getElementById('modelSelect');
        const voiceSelect = document.getElementById('voiceSelect');
        const submitBtn = document.getElementById('submitBtn');
        const errorDiv = document.getElementById('error');
        const playerContainer = document.getElementById('playerContainer');
        const audioPlayer = document.getElementById('audioPlayer');
        const downloadBtn = document.getElementById('downloadBtn');
        const transcriptDiv = document.getElementById('transcript');
        const jsonContainer = document.getElementById('jsonContainer');
        const responseJson = document.getElementById('responseJson');
        const copyJsonBtn = document.getElementById('copyJsonBtn');
        const saveGistBtn = document.getElementById('saveGistBtn');
        const authLinkContainer = document.getElementById('authLinkContainer');
        const authLink = document.getElementById('authLink');
        const gistLinks = document.getElementById('gistLinks');

        function showError(message) {
            errorDiv.textContent = message;
            errorDiv.style.display = 'block';
            playerContainer.style.display = 'none';
            jsonContainer.style.display = 'none';
        }

        function clearError() {
            errorDiv.style.display = 'none';
        }

        function checkGithubAuth() {
            const token = localStorage.getItem('GITHUB_TOKEN');
            if (token) {
                authLinkContainer.style.display = 'none';
                saveGistBtn.style.display = 'inline-block';
            } else {
                authLinkContainer.style.display = 'inline-block';
                saveGistBtn.style.display = 'none';
            }
        }

        function startAuthPoll() {
            const pollInterval = setInterval(() => {
                if (localStorage.getItem('GITHUB_TOKEN')) {
                    checkGithubAuth();
                    clearInterval(pollInterval);
                }
            }, 1000);
        }

        authLink.addEventListener('click', () => {
            window.open('https://tools.simonwillison.net/github-auth', 'github-auth', 'width=600,height=800');
            startAuthPoll();
        });

        async function createGist() {
            const token = localStorage.getItem('GITHUB_TOKEN');
            if (!token) {
                checkGithubAuth();
                return;
            }

            try {
                saveGistBtn.disabled = true;
                saveGistBtn.textContent = 'Saving...';

                const response = await fetch('https://api.github.com/gists', {
                    method: 'POST',
                    headers: {
                        'Authorization': `token ${token}`,
                        'Content-Type': 'application/json',
                    },
                    body: JSON.stringify({
                        description: 'GPT-4o audio response',
                        public: true,
                        files: {
                            'response.json': {
                                content: responseJson.value
                            }
                        }
                    })
                });

                if (!response.ok) {
                    throw new Error('Failed to create gist');
                }

                const data = await response.json();
                const gistId = data.id;
                const gistUrl = data.html_url;
                const playerUrl = `https://tools.simonwillison.net/gpt-4o-audio-player?gist=${gistId}`;

                gistLinks.innerHTML = `
                    <a href="${gistUrl}" target="_blank">View Gist</a>
                    <a href="${playerUrl}" target="_blank">Audio player</a>
                `;
            } catch (error) {
                console.error('Gist creation failed:', error);
                localStorage.removeItem('GITHUB_TOKEN');
                checkGithubAuth();
            } finally {
                saveGistBtn.disabled = false;
                saveGistBtn.textContent = 'Save as Gist';
            }
        }

        saveGistBtn.addEventListener('click', createGist);

        function getAPIKey() {
            let apiKey = localStorage.getItem('openai_api_key');
            if (!apiKey) {
                apiKey = prompt('Please enter your OpenAI API Key:');
                if (apiKey) {
                    localStorage.setItem('openai_api_key', apiKey);
                }
            }
            return apiKey;
        }

        copyJsonBtn.addEventListener('click', async () => {
            try {
                await navigator.clipboard.writeText(responseJson.value);
                const originalText = copyJsonBtn.textContent;
                copyJsonBtn.textContent = 'Copied!';
                copyJsonBtn.disabled = true;
                setTimeout(() => {
                    copyJsonBtn.textContent = originalText;
                    copyJsonBtn.disabled = false;
                }, 1500);
            } catch (err) {
                console.error('Failed to copy text:', err);
            }
        });

        async function submitToAPI() {
            const apiKey = getAPIKey();
            if (!apiKey) {
                alert('API Key is required.');
                return;
            }

            const model = modelSelect.value;
            const voice = voiceSelect.value;
            submitBtn.textContent = 'Processing...';
            submitBtn.disabled = true;
            
            const messages = [];
            if (systemPrompt.value.trim()) {
                messages.push({
                    "role": "system",
                    "content": systemPrompt.value
                });
            }
            messages.push({
                "role": "user",
                "content": promptInput.value
            });

            const payload = {
                "model": model,
                "modalities": [
                    "text",
                    "audio"
                ],
                "audio": {
                    "voice": voice,
                    "format": "wav"
                },
                "messages": messages
            };

            try {
                const response = await fetch('https://api.openai.com/v1/chat/completions', {
                    method: 'POST',
                    headers: {
                        'Content-Type': 'application/json',
                        'Authorization': `Bearer ${apiKey}`
                    },
                    body: JSON.stringify(payload)
                });

                const data = await response.json();
                
                if (!response.ok) {
                    throw new Error(data.error?.message || 'API request failed');
                }

                responseJson.value = JSON.stringify(data, null, 2);
                jsonContainer.style.display = 'block';
                gistLinks.innerHTML = '';
                checkGithubAuth();

                const audioData = data.choices[0].message.audio.data;
                const transcript = data.choices[0].message.audio.transcript;

                const binaryData = atob(audioData);
                const arrayBuffer = new ArrayBuffer(binaryData.length);
                const uint8Array = new Uint8Array(arrayBuffer);
                for (let i = 0; i < binaryData.length; i++) {
                    uint8Array[i] = binaryData.charCodeAt(i);
                }
                const blob = new Blob([uint8Array], { type: 'audio/wav' });
                const audioUrl = URL.createObjectURL(blob);

                audioPlayer.src = audioUrl;
                transcriptDiv.textContent = transcript;
                playerContainer.style.display = 'block';
                clearError();

                downloadBtn.onclick = () => {
                    const a = document.createElement('a');
                    a.href = audioUrl;
                    a.download = 'speech.wav';
                    document.body.appendChild(a);
                    a.click();
                    document.body.removeChild(a);
                };
            } catch (error) {
                console.error('Error:', error);
                showError(error.message || 'An error occurred');
            } finally {
                submitBtn.textContent = 'Generate Speech';
                submitBtn.disabled = false;
            }
        }

        submitBtn.addEventListener('click', submitToAPI);

        promptInput.addEventListener('keypress', (e) => {
            if (e.key === 'Enter' && e.ctrlKey) {
                submitToAPI();
            }
        });

        // Initial GitHub auth check
        checkGithubAuth();
    </script>
</body>
</html>
